// Copyright (C) 2005  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_TOKENIZER_KERNEL_1_CPp_
#define DLIB_TOKENIZER_KERNEL_1_CPp_
#include "dlib/tokenizer/tokenizer_kernel_1.h"

#include <cstdio>
#include <iostream>

namespace dlib
{
  // ----------------------------------------------------------------------------------------

  tokenizer_kernel_1::tokenizer_kernel_1() : headset(0), bodyset(0), have_peeked(false)
  {
    try
    {
      headset = new bool[UCHAR_MAX];
      bodyset = new bool[UCHAR_MAX];

      clear();
    }
    catch (...)
    {
      if (headset)
        delete[] headset;
      if (bodyset)
        delete[] bodyset;
      throw;
    }
  }

  // ----------------------------------------------------------------------------------------

  tokenizer_kernel_1::~tokenizer_kernel_1()
  {
    delete[] bodyset;
    delete[] headset;
  }

  // ----------------------------------------------------------------------------------------

  void tokenizer_kernel_1::clear()
  {
    using namespace std;

    in = 0;
    streambuf = 0;
    have_peeked = false;

    head = "_" + lowercase_letters() + uppercase_letters();
    body = "_" + lowercase_letters() + uppercase_letters() + numbers();

    for (unsigned long i = 0; i < UCHAR_MAX; ++i)
    {
      headset[i] = false;
      bodyset[i] = false;
    }

    for (string::size_type i = 0; i < head.size(); ++i)
      headset[static_cast<unsigned char>(head[i])] = true;
    for (string::size_type i = 0; i < body.size(); ++i)
      bodyset[static_cast<unsigned char>(body[i])] = true;
  }

  // ----------------------------------------------------------------------------------------

  void tokenizer_kernel_1::set_stream(std::istream& in_)
  {
    in = &in_;
    streambuf = in_.rdbuf();
    have_peeked = false;
  }

  // ----------------------------------------------------------------------------------------

  bool tokenizer_kernel_1::stream_is_set() const
  {
    return (in != 0);
  }

  // ----------------------------------------------------------------------------------------

  std::istream& tokenizer_kernel_1::get_stream() const
  {
    return *in;
  }

  // ----------------------------------------------------------------------------------------

  void tokenizer_kernel_1::get_token(int& type, std::string& token)
  {
    if (!have_peeked)
    {
      std::streambuf::int_type ch;
      ch = streambuf->sbumpc();

      switch (ch)
      {
        case EOF:
          type = END_OF_FILE;
          token.clear();
          return;

        case '\n':
          type = END_OF_LINE;
          token = "\n";
          return;

        case '\r':
        case ' ':
        case '\t':
          type = WHITE_SPACE;
          token = static_cast<char>(ch);
          ch = streambuf->sgetc();
          while ((ch == ' ' || ch == '\t' || ch == '\r') && ch != EOF)
          {
            token += static_cast<char>(ch);
            ch = streambuf->snextc();
          }
          return;

        default:
          if (headset[static_cast<unsigned char>(ch)])
          {
            type = IDENTIFIER;
            token = static_cast<char>(ch);
            ch = streambuf->sgetc();
            while (ch != EOF && bodyset[static_cast<unsigned char>(ch)])
            {
              token += static_cast<char>(ch);
              ch = streambuf->snextc();
            }
          }
          else if ('0' <= ch && ch <= '9')
          {
            type = NUMBER;
            token = static_cast<char>(ch);
            ch = streambuf->sgetc();
            while (('0' <= ch && ch <= '9') && ch != EOF)
            {
              token += static_cast<char>(ch);
              ch = streambuf->snextc();
            }
          }
          else
          {
            type = CHAR;
            token = static_cast<char>(ch);
          }
          return;
      }  // switch (ch)
    }

    // if we get this far it means we have peeked so we should
    // return the peek data.
    type = next_type;
    token = next_token;
    have_peeked = false;
  }

  // ----------------------------------------------------------------------------------------

  int tokenizer_kernel_1::peek_type() const
  {
    const_cast<tokenizer_kernel_1*>(this)->get_token(next_type, next_token);
    have_peeked = true;
    return next_type;
  }

  // ----------------------------------------------------------------------------------------

  const std::string& tokenizer_kernel_1::peek_token() const
  {
    const_cast<tokenizer_kernel_1*>(this)->get_token(next_type, next_token);
    have_peeked = true;
    return next_token;
  }

  // ----------------------------------------------------------------------------------------

  void tokenizer_kernel_1::swap(tokenizer_kernel_1& item)
  {
    exchange(in, item.in);
    exchange(streambuf, item.streambuf);
    exchange(head, item.head);
    exchange(body, item.body);
    exchange(bodyset, item.bodyset);
    exchange(headset, item.headset);
    exchange(have_peeked, item.have_peeked);
    exchange(next_type, item.next_type);
    exchange(next_token, item.next_token);
  }

  // ----------------------------------------------------------------------------------------

  void tokenizer_kernel_1::set_identifier_token(const std::string& head_, const std::string& body_)
  {
    using namespace std;

    head = head_;
    body = body_;

    for (unsigned long i = 0; i < UCHAR_MAX; ++i)
    {
      headset[i] = false;
      bodyset[i] = false;
    }

    for (string::size_type i = 0; i < head.size(); ++i)
      headset[static_cast<unsigned char>(head[i])] = true;
    for (string::size_type i = 0; i < body.size(); ++i)
      bodyset[static_cast<unsigned char>(body[i])] = true;
  }

  // ----------------------------------------------------------------------------------------

  const std::string tokenizer_kernel_1::get_identifier_head() const
  {
    return head;
  }

  // ----------------------------------------------------------------------------------------

  const std::string tokenizer_kernel_1::get_identifier_body() const
  {
    return body;
  }

  // ----------------------------------------------------------------------------------------

  const std::string tokenizer_kernel_1::lowercase_letters() const
  {
    return std::string("abcdefghijklmnopqrstuvwxyz");
  }

  // ----------------------------------------------------------------------------------------

  const std::string tokenizer_kernel_1::uppercase_letters() const
  {
    return std::string("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  }

  // ----------------------------------------------------------------------------------------

  const std::string tokenizer_kernel_1::numbers() const
  {
    return std::string("0123456789");
  }

  // ----------------------------------------------------------------------------------------

}  // namespace dlib
#endif  // DLIB_TOKENIZER_KERNEL_1_CPp_
